#' ---
#' title: "Recoding ICPP 2013 Data"
#' author: "Gento Kato & Fan Lu"
#' date: "August 23, 2023"
#' ---
#' 
#' 
#' # Preparation 
#' 

## Clean Up Space
rm(list=ls())


## Set Working Directory (Automatically) ##
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)); 

## Import Relevant Data

## Data Directory: 
## ** This is a data location of 1159.sav downloaded from SSJDA
## ** If downloaded to the same folder as this file, set filedir <- "./"
filedir <- "./"

### 2013 ICPP Data
require(haven)
icpp13 <- read_sav(paste0(filedir,"1159.sav"), encoding="CP932")
attr(icpp13$AREASA,"labels") # labels of communities

#'
#' # Recoding 
#'

## Library Psych Package
require(psych)

# Initiate New Data Set
d <- data.frame(id = icpp13$id)

#'
#' ## DEPENDENT variables of interest
#' 
#' ### The local election suffrage should be granted to foreigners.
#' 
#' * Original: 1=Strongly agree 5=Strongly disagree 6=DK 7=NA
#' * Recoded: 0=Strongly disagree, 0.5=Neither/DK, 1=Strongly agree, Missing=NA

# Original Variable
tmp <- icpp13$q09_d
table(tmp, useNA="always")
# Recoded Variable
d$foreignsuff <- (5-ifelse(is.na(tmp), NA, tmp))/4
table(d$foreignsuff, useNA="always")
d$foreignsuff3 <- ifelse(d$foreignsuff==0.5,1,ifelse(d$foreignsuff>0.5,3,2))
d$foreignsuff3 <- factor(d$foreignsuff3, labels=c("Neither","Disagree","Agree"))
table(d$foreignsuff3, useNA="always")
d$foreignsuff3x <- factor(d$foreignsuff3, levels=c("Disagree","Neither","Agree"))
table(d$foreignsuff3x, useNA="always")

## Alternative Measurement

## Very High Alpha
psych::alpha(icpp13[,c("q09_a","q09_b","q09_d","q09_e")])
tmp <- rowMeans(icpp13[,c("q09_a","q09_b","q09_d","q09_e")])
table(tmp, useNA="always")
# d$foreignrights <- (5-ifelse(is.na(tmp), NA, tmp))/3
# table(d$foreignrights, useNA="always")
# d$foreignrights2 <- ifelse(d$foreignrights>0.5,1,0)
# table(d$foreignrights2, useNA="always")
tmp <- psych::fa(icpp13[,c("q09_a","q09_b","q09_d","q09_e")])
d$foreignrights <- as.numeric(-tmp$scores[,1])
d$foreignrights2 <- ifelse(d$foreignrights>0,1,0)
table(d$foreignrights2, useNA="always")

#'
#' ### Increase in immigrants
#'

d$immigincrease <- NA
table(d$immigincrease, useNA="always")
d$immigincrease2 <- NA
table(d$immigincrease2, useNA="always")

## Alternative Measurement

## Very High Alpha
psych::alpha(icpp13[,c("q07_a","q07_b","q07_c",
                       "q07_d","q07_e","q07_f")])
psych::alpha(icpp13[,c("q07_a","q07_b","q07_c",
                       "q07_d","q07_e","q07_f")])
tmp <- rowMeans(icpp13[,c("q07_a","q07_b","q07_c",
                          "q07_d","q07_e","q07_f")])
table(tmp, useNA="always")
# d$immigincrease_alt <- (4-ifelse(is.na(tmp), NA, tmp))/3
# table(d$immigincrease_alt, useNA="always")
# d$immigincrease2_alt <- ifelse(d$immigincrease_alt>0.5,1,0)
# table(d$immigincrease2_alt, useNA="always")
tmp <- psych::fa(icpp13[,c("q07_a","q07_b","q07_c",
                           "q07_d","q07_e","q07_f")])
tmp
summary(tmp$scores[,1])
d$immigincrease_alt <- as.numeric(-tmp$scores[,1])
d$immigincrease2_alt <- ifelse(d$immigincrease_alt>0,1,0)
table(d$immigincrease2_alt, useNA="always")

#'
#' ## PREDICTORS
#' 
#' ### Education (Ordinal)
#' 
#' * Recoded: 1= "<=SHS", 2="Junior College/Vocational School", 3=">=College" 
#' 

# Original
tmp <- icpp13$q33
table(tmp, useNA="always")
# Recoded
d$edu <- ifelse(is.na(tmp),NA,
                ifelse(tmp%in%c(1,2),1,
                       ifelse(tmp%in%c(3),2,3)))
# Make it a Factor
d$edu <- factor(d$edu, labels = c("<=SHS",
                                  ">SHS & <College(4yr)",
                                  ">=College(4yr)"))
table(d$edu, useNA="always")

# Education Treatment 
d$edu2 <- ifelse(d$edu==">=College(4yr)",1,0)
table(d$edu2, useNA="always")

#' 
#' ### Gender
#' 
#' * Original: 1=male 2=female 3=NA
#' * Recoded: 0=male, 1=female
#' 

# Original
tmp <- icpp13$q01
table(tmp, useNA="always")
# Recoded
d$female <- ifelse(tmp==2, 1, 0)
table(d$female, useNA="always")
d$male <- 1 - d$female

#'
#' ### Age
#'
#' * Recoded (Categorical):

# Original
tmp <- icpp13$q02
table(tmp, useNA="always")
d$age <- tmp

## Recoded Born Year (The Survey was conducted NOv-Dec, so (Oct-Nov)-(Nov-Dec))
d$bornyr <- NA
d$bornyr <- 2013 - d$age

## Academic Year of Entering College
# The survey was on November-December, so assume that they already turned 19.
d$univyr <-  2013 - (d$age-19)
unique(d$univyr[which(d$age==20)]) # If you are 20, 2012 is the year to enter

# Recoded Categorical
d$agecat <- NA
d$agecat[d$age >= 60] <- "Elder (>=60s)"
d$agecat[d$age >= 40 & d$age < 60] <- "Middle Aged (40-50s)"
d$agecat[d$age < 40] <- "Young (<=30s)"
## coerce new character variable into a factor variable
d$agecat <- factor(d$agecat, levels=c("Young (<=30s)",
                                      "Middle Aged (40-50s)",
                                      "Elder (>=60s)"))
table(d$agecat, useNA="always") 

# Recoded Cohort
## Cohort I (-1975 Expansion) 
## Cohort II (1975-1990 Stagnation) 
## Cohort III (1990-2000 Expansion) 
## Cohort IV (2000- Universal) 
d$cohort <- NA
d$cohort[which(d$univyr<1975)] <- 1
d$cohort[which(d$univyr>=1975 & d$univyr<1990)] <- 2
d$cohort[which(d$univyr>=1990 & d$univyr<2000)] <- 3
d$cohort[which(d$univyr>=2000 & d$univyr<2010)] <- 4
# d$cohort[which(d$univyr>=2010)] <- 4
## A factor variable
d$cohort <- factor(d$cohort, labels=c("Cohort I (18+ in -1975)",
                                      "Cohort II (18+ in 1976-1989)",
                                      "Cohort III (18+ in 1990-99)",
                                      "Cohort IV (18+ in 2000-09)"))
table(d$cohort, useNA="always")

#'
#' ### Income
#'

# Original
tmp <- icpp13$q37
table(tmp, useNA="always")
# Recoded
## Percentile Conversion Function
convper <- function(old.var,missing.val){
  r <- old.var
  r[r %in% missing.val] <- NA
  rt <- cumsum(table(r)/sum(table(r))) # Cumulative Percentile
  rt <- rt - diff(c(0,rt))/2 # Take Midpoints 
  r <- rt[match(r, names(rt))]
  return(r)
}
d$income <- convper(tmp, c(88,99))
table(d$income, useNA="always")

d$incomecat <- NA
d$incomecat[which(d$income<=0.33)] <- "Low"
d$incomecat[which(d$income>0.33 & d$income<=0.67)] <- "Middle"
d$incomecat[which(d$income>0.67)] <- "High"
d$incomecat[which(is.na(tmp))] <- "Missing"
d$incomecat <- factor(d$incomecat, levels=c("Low","Middle","High","Missing"))
table(d$incomecat, useNA="always") 

#'
#' # Jobs
#'

## Working Status
tmp <- icpp13$q34_1
table(tmp, useNA="always")

d$workstat <- ifelse(tmp%in%c(9)|is.na(tmp),NA,
                     ifelse(tmp%in%c(5),"Self-Employed/Full-Time/Managerial",
                            ifelse(tmp%in%c(1),"Self-Employed/Full-Time/Managerial",
                                   ifelse(tmp%in%c(2,6),"Student/Part-Time",
                                          ifelse(tmp%in%c(3,4),"Self-Employed/Full-Time/Managerial","Not Employed")))))


d$workstat <- factor(d$workstat, 
                     levels=rev(c("Self-Employed/Full-Time/Managerial",
                                  "Student/Part-Time","Not Employed")))
table(d$workstat)

d$employed <- ifelse(d$workstat=="Not Employed",0,1)
table(d$employed)

#'
#' ### Marital Status
#'

table(icpp13$q32, useNA="always")

d$married <- ifelse(icpp13$q32==1,1,0)

#'
#' # Urban Rural
#'

table(icpp13$AREASA, useNA="always")
d$urban <- NA
d$urban[icpp13$AREASA%in%c(13,25,30,34,36,38,41,43,47,48,49,50,51)] <- 1 # 大都市
d$urban[icpp13$AREASA%in%c(20,21,23,24,28,29,31,32,33,35,39,42,44)] <- 2 # 大都市の郊外
d$urban[icpp13$AREASA%in%c(1,2,6,7,8,11,16,17,26,27,37,40,45)] <- 3 # 中規模な都市
d$urban[icpp13$AREASA%in%c(3,4,5,7,9,10,12,14,18,19,22,46)] <- 4 # 小規模な都市
d$urban[icpp13$AREASA%in%c(15)] <- 5 # 町村

d$urban <- (5-d$urban)/4
table(d$urban, useNA="always")

#'
#' # Area Indicator
#'

d$area <- as.factor(icpp13$AREASA)

#'
#' # Saving Data
#'

#+ eval=FALSE
saveRDS(d, "data_icpp13_v7.rds")